required_packages <- c("readxl","dplyr")
# Install and load packages if not already installed
for (package in required_packages) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package)
  }
  library(package, character.only = TRUE)
}

#import data frame
if (!exists("df")) {
  file_path <- "~/cohort.xlsx"
  df <- read_excel(file_path, sheet = "cohort", na = "#N/A")
}

#filter df 
df_filtered <- df[, c("r_upper_suv_mean", "r_upper_suv_min", "r_upper_suv_max","r_upper_suv_sd", 
                      "r_lower_suv_mean", "r_lower_suv_min", "r_lower_suv_max","r_lower_suv_sd",
                      "l_upper_suv_mean", "l_upper_suv_min", "l_upper_suv_max","l_upper_suv_sd",
                      "l_lower_suv_mean", "l_lower_suv_min", "l_lower_suv_max","l_lower_suv_sd",
                      "liver1_suv_mean" , "liver1_suv_min","liver1_suv_max","liver1_suv_sd",
                      "liver2_suv_mean","liver2_suv_min","liver2_suv_max","liver2_suv_sd",
                      "aorta_suv_mean","aorta_suv_min","aorta_suv_max","aorta_suv_sd","side_r0_l1","pet_distance_ic", "pet_device",
                      "sex_male","bmi","age","copd","nicotine",
                      "packyears","staging","diabetes",
                      "pleural_effusion","pericardial_effusion","chd","blood_hb","op","radio_thorax")]

df_filtered[] <- lapply(df_filtered, as.numeric)
df_filtered <- subset(df_filtered, pet_distance_ic <= 365 | is.na(pet_distance_ic))
df_filtered <- subset(df_filtered, pet_distance_ic >= -1)
df_filtered <- subset(df_filtered, pet_device == 6)

#add cols
cols <- c("no_tumor_suv_mean", "no_tumor_suv_min", "no_tumor_suv_max", "no_tumor_suv_sd", 
          "suv_70", "suv_75", "suv_80", "suv_85", "suv_90", "suv_95", "no_tumor_suv_95", "upper_suv_95", "lower_suv_95", 
          "liver_ratio_mean", "liver_ratio_max", "liver_ratio_min", "liver_ratio_sd",
          "liver_ratio_lower_mean", "liver_ratio_lower_max", "liver_ratio_lower_min", "liver_ratio_lower_sd",
          "liver_ratio_upper_mean", "liver_ratio_upper_max", "liver_ratio_upper_min", "liver_ratio_upper_sd",
          "liver_no_tumor_ratio_mean", "liver_no_tumor_ratio_max", "liver_no_tumor_ratio_min", "liver_no_tumor_ratio_sd",
          "aorta_ratio_mean", "aorta_ratio_max", "aorta_ratio_min", "aorta_ratio_sd",
          "aorta_ratio_lower_mean", "aorta_ratio_lower_max", "aorta_ratio_lower_min", "aorta_ratio_lower_sd",
          "aorta_ratio_upper_mean", "aorta_ratio_upper_max", "aorta_ratio_upper_min", "aorta_ratio_upper_sd",
          "aorta_no_tumor_ratio_mean", "aorta_no_tumor_ratio_max", "aorta_no_tumor_ratio_min", "aorta_no_tumor_ratio_sd",
          "pet_before_immunetherapy","stage")
df_filtered[,cols] <- NA

#calculate SUV
for (i in 1:nrow(df_filtered)) {
  if (is.na(df_filtered$l_upper_suv_mean[i]) & !is.na(df_filtered$r_upper_suv_mean[i])) {
    df_filtered$l_upper_suv_max[i] <- df_filtered$r_upper_suv_max[i]
    df_filtered$l_upper_suv_min[i] <- df_filtered$r_upper_suv_min[i]
    df_filtered$l_upper_suv_mean[i] <- df_filtered$r_upper_suv_mean[i]
    df_filtered$l_upper_suv_sd[i] <- df_filtered$r_upper_suv_sd[i]
  }
  if (is.na(df_filtered$r_upper_suv_mean[i]) & !is.na(df_filtered$l_upper_suv_mean[i])) {
    df_filtered$r_upper_suv_max[i] <- df_filtered$l_upper_suv_max[i]
    df_filtered$r_upper_suv_min[i] <- df_filtered$l_upper_suv_min[i]
    df_filtered$r_upper_suv_mean[i] <- df_filtered$l_upper_suv_mean[i]
    df_filtered$r_upper_suv_sd[i] <- df_filtered$l_upper_suv_sd[i]
  }
  if (is.na(df_filtered$l_lower_suv_mean[i]) & !is.na(df_filtered$r_lower_suv_mean[i])) {
    df_filtered$l_lower_suv_max[i] <- df_filtered$r_lower_suv_max[i]
    df_filtered$l_lower_suv_min[i] <- df_filtered$r_lower_suv_min[i]
    df_filtered$l_lower_suv_mean[i] <- df_filtered$r_lower_suv_mean[i]
    df_filtered$l_lower_suv_sd[i] <- df_filtered$r_lower_suv_sd[i]
  }
  if (is.na(df_filtered$r_lower_suv_mean[i]) & !is.na(df_filtered$l_lower_suv_mean[i])) {
    df_filtered$r_lower_suv_max[i] <- df_filtered$l_lower_suv_max[i]
    df_filtered$r_lower_suv_min[i] <- df_filtered$l_lower_suv_min[i]
    df_filtered$r_lower_suv_mean[i] <- df_filtered$l_lower_suv_mean[i]
    df_filtered$r_lower_suv_sd[i] <- df_filtered$l_lower_suv_sd[i]
  }
  if (is.na(df_filtered$liver1_suv_mean[i]) & !is.na(df_filtered$liver2_suv_mean[i])) {
    df_filtered$liver1_suv_max[i] <- df_filtered$liver2_suv_max[i]
    df_filtered$liver1_suv_min[i] <- df_filtered$liver2_suv_min[i]
    df_filtered$liver1_suv_mean[i] <- df_filtered$liver2_suv_mean[i]
    df_filtered$liver1_suv_sd[i] <- df_filtered$liver2_suv_sd[i]
  }
  if (is.na(df_filtered$liver2_suv_mean[i]) & !is.na(df_filtered$liver1_suv_mean[i])) {
    df_filtered$liver2_suv_max[i] <- df_filtered$liver1_suv_max[i]
    df_filtered$liver2_suv_min[i] <- df_filtered$liver1_suv_min[i]
    df_filtered$liver2_suv_mean[i] <- df_filtered$liver1_suv_mean[i]
    df_filtered$liver2_suv_sd[i] <- df_filtered$liver1_suv_sd[i]
  }
  if (!is.na(df_filtered$side_r0_l1[i])) {
    if (df_filtered$side_r0_l1[i] == 1 & !is.na(df_filtered$l_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$l_lower_suv_mean[i], df_filtered$l_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$l_lower_suv_min[i], df_filtered$l_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$l_lower_suv_max[i], df_filtered$l_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$l_lower_suv_sd[i], df_filtered$l_upper_suv_sd[i], na.rm = TRUE)
    } else if (df_filtered$side_r0_l1[i] == 0 & !is.na(df_filtered$r_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$r_lower_suv_mean[i], df_filtered$r_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$r_lower_suv_min[i], df_filtered$r_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$r_lower_suv_max[i], df_filtered$r_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$r_lower_suv_sd[i], df_filtered$r_upper_suv_sd[i], na.rm = TRUE)
    }
  }
}

df_filtered$upper_suv_mean <- rowMeans(df_filtered[, c("r_upper_suv_mean", "l_upper_suv_mean")], na.rm = TRUE)
df_filtered$upper_suv_max <- rowMeans(df_filtered[, c("r_upper_suv_max", "l_upper_suv_max")], na.rm = TRUE)
df_filtered$upper_suv_min <- rowMeans(df_filtered[, c("r_upper_suv_min", "l_upper_suv_min")], na.rm = TRUE)
df_filtered$upper_suv_sd <- rowMeans(df_filtered[, c("r_upper_suv_sd", "l_upper_suv_sd")], na.rm = TRUE)

df_filtered$lower_suv_mean <- rowMeans(df_filtered[, c("r_lower_suv_mean", "l_lower_suv_mean")], na.rm = TRUE)
df_filtered$lower_suv_max <- rowMeans(df_filtered[, c("r_lower_suv_max", "l_lower_suv_max")], na.rm = TRUE)
df_filtered$lower_suv_min <- rowMeans(df_filtered[, c("r_lower_suv_min", "l_lower_suv_min")], na.rm = TRUE)
df_filtered$lower_suv_sd <- rowMeans(df_filtered[, c("r_lower_suv_sd", "l_lower_suv_sd")], na.rm = TRUE)

df_filtered$suv_lung_mean <- rowMeans(df_filtered[, c("upper_suv_mean", "lower_suv_mean")], na.rm = TRUE)
df_filtered$suv_lung_max <- rowMeans(df_filtered[, c("upper_suv_max", "lower_suv_max")], na.rm = TRUE)
df_filtered$suv_lung_min <- rowMeans(df_filtered[, c("upper_suv_min", "lower_suv_min")], na.rm = TRUE)
df_filtered$suv_lung_sd <- rowMeans(df_filtered[, c("upper_suv_sd", "lower_suv_sd")], na.rm = TRUE)

df_filtered$liver_suv_mean <- rowMeans(df_filtered[, c("liver1_suv_mean", "liver2_suv_mean")], na.rm = TRUE)
df_filtered$liver_suv_max <- rowMeans(df_filtered[, c("liver1_suv_max", "liver2_suv_max")], na.rm = TRUE)
df_filtered$liver_suv_min <- rowMeans(df_filtered[, c("liver1_suv_min", "liver2_suv_min")], na.rm = TRUE)
df_filtered$liver_suv_sd <- rowMeans(df_filtered[, c("liver1_suv_sd", "liver2_suv_sd")], na.rm = TRUE)

for (i in 1:nrow(df_filtered)) {
  df_filtered$suv_70[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.70) * df_filtered$suv_lung_sd[i])
  df_filtered$suv_75[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.75) * df_filtered$suv_lung_sd[i])
  df_filtered$suv_80[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.80) * df_filtered$suv_lung_sd[i])
  df_filtered$suv_85[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.85) * df_filtered$suv_lung_sd[i])
  df_filtered$suv_90[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.90) * df_filtered$suv_lung_sd[i])
  df_filtered$suv_95[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.95) * df_filtered$suv_lung_sd[i])
  if (!is.na(df_filtered$no_tumor_suv_mean[i])) {
    df_filtered$no_tumor_suv_95[i] <- df_filtered$no_tumor_suv_mean[i] + (qnorm(0.95) * df_filtered$no_tumor_suv_sd[i])
    df_filtered$upper_suv_95[i] <- df_filtered$upper_suv_mean[i] + (qnorm(0.95) * df_filtered$upper_suv_sd[i])
    df_filtered$lower_suv_95[i] <- df_filtered$lower_suv_mean[i] + (qnorm(0.95) * df_filtered$lower_suv_sd[i])
  }
  df_filtered$liver_ratio_mean[i] <- (df_filtered$suv_lung_mean[i] /df_filtered$liver_suv_mean[i]) 
  df_filtered$liver_ratio_max[i] <- (df_filtered$suv_lung_max[i]/df_filtered$liver_suv_max[i]) 
  df_filtered$liver_ratio_min[i] <- (df_filtered$suv_lung_min[i]/df_filtered$liver_suv_min[i]) 
  df_filtered$liver_ratio_sd[i] <- (df_filtered$suv_lung_sd[i]/df_filtered$liver_suv_sd[i]) 
  
  df_filtered$liver_ratio_upper_mean[i] <- (df_filtered$upper_suv_mean[i]/df_filtered$liver_suv_mean[i]) 
  df_filtered$liver_ratio_upper_max[i] <- (df_filtered$upper_suv_max[i]/df_filtered$liver_suv_max[i]) 
  df_filtered$liver_ratio_upper_min[i] <- (df_filtered$upper_suv_min[i]/df_filtered$liver_suv_min[i])
  df_filtered$liver_ratio_upper_sd[i] <- (df_filtered$upper_suv_sd[i]/df_filtered$liver_suv_sd[i])
  
  df_filtered$liver_ratio_lower_mean[i] <- (df_filtered$lower_suv_mean[i]/df_filtered$liver_suv_mean[i]) 
  df_filtered$liver_ratio_lower_max[i] <- (df_filtered$lower_suv_max[i]/df_filtered$liver_suv_max[i]) 
  df_filtered$liver_ratio_lower_min[i] <- (df_filtered$lower_suv_min[i]/df_filtered$liver_suv_min[i])
  df_filtered$liver_ratio_lower_sd[i] <- (df_filtered$lower_suv_sd[i]/df_filtered$liver_suv_sd[i])
  
  if (!is.na(df_filtered$no_tumor_suv_mean[i])) {
    df_filtered$liver_no_tumor_ratio_mean[i] <- (df_filtered$no_tumor_suv_mean[i]/df_filtered$liver_suv_mean[i]) 
    df_filtered$liver_no_tumor_ratio_max[i]<- (df_filtered$no_tumor_suv_max[i]/df_filtered$liver_suv_max[i]) 
    df_filtered$liver_no_tumor_ratio_min[i]<- (df_filtered$no_tumor_suv_min[i]/df_filtered$liver_suv_min[i]) 
    df_filtered$liver_no_tumor_ratio_sd[i]<- (df_filtered$no_tumor_suv_sd[i]/df_filtered$liver_suv_sd[i]) 
  }
  df_filtered$aorta_ratio_mean[i] <- (df_filtered$suv_lung_mean[i]/df_filtered$aorta_suv_mean[i]) 
  df_filtered$aorta_ratio_max[i] <- (df_filtered$suv_lung_max[i]/df_filtered$aorta_suv_max[i]) 
  df_filtered$aorta_ratio_min[i] <- (df_filtered$suv_lung_min[i]/df_filtered$aorta_suv_min[i]) 
  df_filtered$aorta_ratio_sd[i] <- (df_filtered$suv_lung_sd[i]/df_filtered$aorta_suv_sd[i]) 
  
  df_filtered$aorta_ratio_upper_mean[i] <- (df_filtered$upper_suv_mean[i]/df_filtered$aorta_suv_mean[i]) 
  df_filtered$aorta_ratio_upper_max[i] <- (df_filtered$upper_suv_max[i]/df_filtered$aorta_suv_max[i]) 
  df_filtered$aorta_ratio_upper_min[i] <- (df_filtered$upper_suv_min[i]/df_filtered$aorta_suv_min[i]) 
  df_filtered$aorta_ratio_upper_sd[i] <- (df_filtered$upper_suv_sd[i]/df_filtered$aorta_suv_sd[i])
  
  df_filtered$aorta_ratio_lower_mean[i] <- (df_filtered$lower_suv_mean[i]/df_filtered$aorta_suv_mean[i]) 
  df_filtered$aorta_ratio_lower_max[i] <- (df_filtered$lower_suv_max[i]/df_filtered$aorta_suv_max[i]) 
  df_filtered$aorta_ratio_lower_min[i] <- (df_filtered$lower_suv_min[i]/df_filtered$aorta_suv_min[i])
  df_filtered$aorta_ratio_lower_sd[i] <- (df_filtered$lower_suv_sd[i]/df_filtered$aorta_suv_sd[i])
  
  if (!is.na(df_filtered$no_tumor_suv_mean[i])) {
    df_filtered$aorta_no_tumor_ratio_mean[i] <- (df_filtered$no_tumor_suv_mean[i]/df_filtered$aorta_suv_mean[i]) 
    df_filtered$aorta_no_tumor_ratio_max[i]<- (df_filtered$no_tumor_suv_max[i]/df_filtered$aorta_suv_max[i]) 
    df_filtered$aorta_no_tumor_ratio_min[i]<- (df_filtered$no_tumor_suv_min[i]/df_filtered$aorta_suv_min[i]) 
    df_filtered$aorta_no_tumor_ratio_sd[i] <- (df_filtered$no_tumor_suv_sd[i]/df_filtered$aorta_suv_sd[i]) 
  }
}

#define variables
variables <- c("suv_lung_max","upper_suv_max","lower_suv_max","no_tumor_suv_max",
               "suv_lung_mean","upper_suv_mean","lower_suv_mean","no_tumor_suv_mean",
               "suv_95","upper_suv_95","lower_suv_95","no_tumor_suv_95",
               "liver_ratio_mean","liver_ratio_upper_mean", "liver_ratio_lower_mean","liver_no_tumor_ratio_mean", 
               "aorta_ratio_mean","aorta_ratio_upper_mean","aorta_ratio_lower_mean","aorta_no_tumor_ratio_mean")

df_filtered[] <- lapply(df_filtered, as.numeric)

#bonferroni
variable_count <- length(variables)
adjusted_alpha <- 0.05 / variable_count
print(paste(variable_count," variables. Bonferroni adjusted alpha:",adjusted_alpha))

#function for binary variables
run_bi_tests <- function(var_main, col0, col1) {
  table <- table(df_filtered[[var_main]])
  
  results <- data.frame(variable = character(),
                        matrix(numeric(), nrow = 0, ncol = 4),
                        p_value = numeric(),
                        significance = character(),
                        correlation = character(),
                        stringsAsFactors = FALSE)
  
  names(results)[2:5] <- c(paste0(col0, "_mean"), paste0(col0, "_sd"), paste0(col1, "_mean"), paste0(col1, "_sd"))
  
  for (var in variables) {
    summary_df <- df_filtered %>% 
      group_by(!!sym(var_main)) %>%
      summarise(
        mean = mean(.data[[var]], na.rm = TRUE),
        sd = sd(.data[[var]], na.rm = TRUE),
        .groups = 'keep'
      )
    test <- wilcox.test(df_filtered[[var]] ~ df_filtered[[var_main]], data = df_filtered, exact = FALSE)
    
    #bonferroni
    adjusted_alpha <- 0.05 / length(variables)
    
    if (test$p.value < adjusted_alpha/100) {
      significance <- "***"
    } else if (test$p.value < adjusted_alpha/10) {
      significance <- "**"
    } else if (test$p.value < adjusted_alpha) {
      significance <- "*"
    } else if (test$p.value < 0.05) {
      significance <- "."
    } else {
      significance <- ""
    }
    correlation <- ""
    if (test$p.value < adjusted_alpha) {
      if (summary_df$mean[1]-summary_df$mean[2] >= 0){
        correlation <- "-"
      }
      else {
        correlation <- "+"
      }
    }
    
    results[nrow(results) + 1,] <- c(var, summary_df$mean[1], summary_df$sd[1],
                                     summary_df$mean[2], summary_df$sd[2], test$p.value, significance,correlation)
  }
  
  return(results)
}

#function for numeric variables
run_num_tests <- function(var_main) {
  results <- data.frame(variable = character(),
                        estimate = numeric(),
                        mean = numeric(),
                        sd = numeric(),
                        p_value = numeric(),
                        significance = character(),
                        correlation = character(),
                        stringsAsFactors = FALSE)
  
  for (var in variables) {
    test <- cor.test(df_filtered[[var]], df_filtered[[var_main]], method = "spearman", exact = FALSE)
    
    #bonferroni
    adjusted_alpha <- 0.05 / length(variables)
    
    if (test$p.value < adjusted_alpha/100) {
      significance <- "***"
    } else if (test$p.value < adjusted_alpha/10) {
      significance <- "**"
    } else if (test$p.value < adjusted_alpha) {
      significance <- "*"
    } else if (test$p.value < 0.05) {
      significance <- "."
    } else {
      significance <- ""
    }
    
    correlation = ""
    if (test$p.value < adjusted_alpha) {
      correlation <- "+"
      if (test$estimate < 0) {
        correlation <- "-"
      }
    }
    results[nrow(results) + 1,] <- c(var, test$estimate, "","", test$p.value, significance, correlation)
  }
  results$mean[1] <- mean(df_filtered[[var_main]], na.rm = TRUE)
  results$sd[1] <- sd(df_filtered[[var_main]], na.rm = TRUE)
  
  return(results)
}

#run functions with variables
results_sex_male <- run_bi_tests("sex_male", "women", "men")
results_copd <- run_bi_tests("copd", "noCOPD", "COPD")
results_smoking <- run_bi_tests("nicotine", "noSmoking", "Smoking")
results_diabetes <- run_bi_tests("diabetes", "noD.M.", "D.M.")
results_pleura <- run_bi_tests("pleural_effusion", "noPlE", "PlE")
results_pericard <- run_bi_tests("pericardial_effusion", "noPE", "PE")
results_chd <- run_bi_tests("chd", "noCHD", "CHD")
results_op <- run_bi_tests("op", "noOP", "OP")
results_radiation <- run_bi_tests("radio_thorax", "noRadio", "Radio")
df_filtered$stage[df_filtered$staging < 3] <- 0
df_filtered$stage[df_filtered$staging >2] <- 1
results_stage <- run_bi_tests("stage", "lowerstage", "higherstage")
results_bmi <- run_num_tests("bmi")
results_age <- run_num_tests("age")
results_py <- run_num_tests("packyears")
results_hb <- run_num_tests("blood_hb")

#correlation
results_correlation_bi <- data.frame(variable = variables, stringsAsFactors = FALSE)
for (db in c("copd", "smoking","sex_male", "diabetes", 
             "chd", "op", "radiation", "stage", "pleura", "pericard")) {
  db_result <- get(paste0("results_", db))
  correlation <- db_result$correlation
  results_correlation_bi[[db]] <- correlation
}

results_correlatio_num <- data.frame(variable = variables, stringsAsFactors = FALSE)
for (db in c("bmi", "age", "py", "hb")) {
  db_result <- get(paste0("results_", db))
  results_correlatio_num[[db]] <- db_result$correlation
}

#combine dfs
results_correlation <- results_correlation_bi
results_correlation$bmi <- results_correlatio_num$bmi
results_correlation$age <- results_correlatio_num$age
results_correlation$packyears <- results_correlatio_num$py
results_correlation$hb <- results_correlatio_num$hb

#add rows and sort df
insert_row <- function(variables, row){
  new_row <- data.frame(variable = variables, copd = "", smoking = "",sex_male = "", diabetes = "", chd = "", 
                        op = "", radiation = "", stage = "", pleura = "", pericard = "", bmi = "", age = "", packyears = "", hb = "")
  results_correlation <<- rbind(results_correlation[0:row,], new_row, results_correlation[(row+1):nrow(results_correlation),])
}
insert_row("SUVMAX", 0)
insert_row("SUVMEAN", 5)
insert_row("SUV95", 10)
insert_row("SUVMEAN lung/liver", 15)
insert_row("SUVMEAN lung/blood pool", 20)
rownames(results_correlation) <- 1:nrow(results_correlation)

#rename variables in df
results_correlation[results_correlation == "suv_lung_max"] <- "whole lung"
results_correlation[results_correlation == "upper_suv_max"] <- "upper lung"
results_correlation[results_correlation == "lower_suv_max"] <- "lower lung"
results_correlation[results_correlation == "no_tumor_suv_max"] <- "TFL"

results_correlation[results_correlation == "suv_lung_mean"] <- "whole lung"
results_correlation[results_correlation == "upper_suv_mean"] <- "upper lung"
results_correlation[results_correlation == "lower_suv_mean"] <- "lower lung"
results_correlation[results_correlation == "no_tumor_suv_mean"] <- "TFL"

results_correlation[results_correlation == "suv_95"] <- "whole lung"
results_correlation[results_correlation == "upper_suv_95"] <- "upper lung"
results_correlation[results_correlation == "lower_suv_95"] <- "lower lung"
results_correlation[results_correlation == "no_tumor_suv_95"] <- "TFL"


results_correlation[results_correlation == "liver_ratio_mean"] <- "whole lung"
results_correlation[results_correlation == "liver_ratio_upper_mean"] <- "upper lung"
results_correlation[results_correlation == "liver_ratio_lower_mean"] <- "lower lung"
results_correlation[results_correlation == "liver_no_tumor_ratio_mean"] <- "TFL"

results_correlation[results_correlation == "aorta_ratio_mean"] <- "whole lung"
results_correlation[results_correlation == "aorta_ratio_upper_mean"] <- "upper lung"
results_correlation[results_correlation == "aorta_ratio_lower_mean"] <- "lower lung"
results_correlation[results_correlation == "aorta_no_tumor_ratio_mean"] <- "TFL"

results_correlation <- results_correlation %>%
  select(variable, sex_male, age,bmi, smoking, packyears, copd, stage, op, radiation, pleura, pericard,diabetes, chd, hb)

colnames(results_correlation)[colnames(results_correlation) == "variable"] <- "Variable"
colnames(results_correlation)[colnames(results_correlation) == "sex_male"] <- "Sex"
colnames(results_correlation)[colnames(results_correlation) == "age"] <- "Age"
colnames(results_correlation)[colnames(results_correlation) == "bmi"] <- "BMI"
colnames(results_correlation)[colnames(results_correlation) == "smoking"] <- "Smoking"
colnames(results_correlation)[colnames(results_correlation) == "packyears"] <- "PY"
colnames(results_correlation)[colnames(results_correlation) == "copd"] <- "COPD"
colnames(results_correlation)[colnames(results_correlation) == "stage"] <- "Stage"
colnames(results_correlation)[colnames(results_correlation) == "op"] <- "OP"
colnames(results_correlation)[colnames(results_correlation) == "radiation"] <- "Rad"
colnames(results_correlation)[colnames(results_correlation) == "pleura"] <- "PE"
colnames(results_correlation)[colnames(results_correlation) == "pericard"] <- "PCE"
colnames(results_correlation)[colnames(results_correlation) == "diabetes"] <- "DM"
colnames(results_correlation)[colnames(results_correlation) == "chd"] <- "CHD"
colnames(results_correlation)[colnames(results_correlation) == "hb"] <- "Hb"

write.csv(results_correlation, file = "table A2b correlation.csv", row.names = FALSE)
print("saved: table A2b correlation.csv")

#significant
results_p_value_bi <- data.frame(variable = variables, stringsAsFactors = FALSE)
for (db in c("copd", "smoking","sex_male", "diabetes", 
             "chd", "op", "radiation", "stage", "pleura", "pericard")) {
  db_result <- get(paste0("results_", db))
  p_value <- round(as.numeric(as.character(db_result$p_value)), 6)
  results_p_value_bi[[db]] <- p_value
}

results_p_vlaue_num <- data.frame(variable = variables, stringsAsFactors = FALSE)
for (db in c("bmi", "age", "py", "hb")) {
  db_result <- get(paste0("results_", db))
  p_value <- round(as.numeric(as.character(db_result$p_value)), 6)
  results_p_vlaue_num[[db]] <- p_value
}

results_p_value <- results_p_value_bi
results_p_value$bmi <- results_p_vlaue_num$bmi
results_p_value$age <- results_p_vlaue_num$age
results_p_value$packyears <- results_p_vlaue_num$py
results_p_value$hb <- results_p_vlaue_num$hb

#add rows and sort df
insert_row <- function(variables, row){
  new_row <- data.frame(variable = variables, copd = "", smoking = "",sex_male = "", diabetes = "", chd = "", 
                        op = "", radiation = "", stage = "", pleura = "", pericard = "", bmi = "", age = "", packyears = "", hb = "")
  results_p_value <<- rbind(results_p_value[0:row,], new_row, results_p_value[(row+1):nrow(results_p_value),])
}

insert_row("SUVMAX", 0)
insert_row("SUVMEAN", 5)
insert_row("SUV95", 10)
insert_row("SUVMEAN lung/liver", 15)
insert_row("SUVMEAN lung/blood pool", 20)
rownames(results_p_value) <- 1:nrow(results_p_value)

#rename variables in df
results_p_value[results_p_value == "suv_lung_max"] <- "whole lung"
results_p_value[results_p_value == "upper_suv_max"] <- "upper lung"
results_p_value[results_p_value == "lower_suv_max"] <- "lower lung"
results_p_value[results_p_value == "no_tumor_suv_max"] <- "TFL"

results_p_value[results_p_value == "suv_lung_mean"] <- "whole lung"
results_p_value[results_p_value == "upper_suv_mean"] <- "upper lung"
results_p_value[results_p_value == "lower_suv_mean"] <- "lower lung"
results_p_value[results_p_value == "no_tumor_suv_mean"] <- "TFL"

results_p_value[results_p_value == "suv_95"] <- "whole lung"
results_p_value[results_p_value == "upper_suv_95"] <- "upper lung"
results_p_value[results_p_value == "lower_suv_95"] <- "lower lung"
results_p_value[results_p_value == "no_tumor_suv_95"] <- "TFL"


results_p_value[results_p_value == "liver_ratio_mean"] <- "whole lung"
results_p_value[results_p_value == "liver_ratio_upper_mean"] <- "upper lung"
results_p_value[results_p_value == "liver_ratio_lower_mean"] <- "lower lung"
results_p_value[results_p_value == "liver_no_tumor_ratio_mean"] <- "TFL"

results_p_value[results_p_value == "aorta_ratio_mean"] <- "whole lung"
results_p_value[results_p_value == "aorta_ratio_upper_mean"] <- "upper lung"
results_p_value[results_p_value == "aorta_ratio_lower_mean"] <- "lower lung"
results_p_value[results_p_value == "aorta_no_tumor_ratio_mean"] <- "TFL"

results_p_value <- results_p_value %>%
  select(variable, sex_male, age,bmi, smoking, packyears, copd, stage, op, radiation, pleura, pericard,diabetes,chd,hb)

colnames(results_p_value)[colnames(results_p_value) == "variable"] <- "Variable"
colnames(results_p_value)[colnames(results_p_value) == "sex_male"] <- "Sex"
colnames(results_p_value)[colnames(results_p_value) == "age"] <- "Age"
colnames(results_p_value)[colnames(results_p_value) == "bmi"] <- "BMI"
colnames(results_p_value)[colnames(results_p_value) == "smoking"] <- "Smoking"
colnames(results_p_value)[colnames(results_p_value) == "packyears"] <- "PY"
colnames(results_p_value)[colnames(results_p_value) == "copd"] <- "COPD"
colnames(results_p_value)[colnames(results_p_value) == "stage"] <- "Stage"
colnames(results_p_value)[colnames(results_p_value) == "op"] <- "OP"
colnames(results_p_value)[colnames(results_p_value) == "radiation"] <- "Rad"
colnames(results_p_value)[colnames(results_p_value) == "pleura"] <- "PE"
colnames(results_p_value)[colnames(results_p_value) == "pericard"] <- "PCE"
colnames(results_p_value)[colnames(results_p_value) == "diabetes"] <- "DM"
colnames(results_p_value)[colnames(results_p_value) == "chd"] <- "CHD"
colnames(results_p_value)[colnames(results_p_value) == "hb"] <- "Hb"

write.csv(results_p_value, file = "table A2a overview.csv", row.names = FALSE)
print("saved: table A2a overview.csv")
print(results_p_value)

#estimate
results_estimate_bi <- data.frame(variable = variables, stringsAsFactors = FALSE)
for (db in c("copd", "smoking","sex_male", "diabetes", 
             "chd", "op", "radiation", "stage", "pleura", "pericard")) {
  db_result <- get(paste0("results_", db))
  results_estimate_bi[[db]] <- NA
}

results_estimate_num <- data.frame(variable = variables, stringsAsFactors = FALSE)
for (db in c("bmi", "age", "py", "hb")) {
  db_result <- get(paste0("results_", db))
  estimate <- round(as.numeric(as.character(db_result$estimate)), 6)
  results_estimate_num[[db]] <- estimate
}

#combine dfs
results_estimate <- results_estimate_bi
results_estimate$bmi <- results_estimate_num$bmi
results_estimate$age <- results_estimate_num$age
results_estimate$packyears <- results_estimate_num$py
results_estimate$hb <- results_estimate_num$hb

#add rows and sort df
insert_row <- function(variables, row){
  new_row <- data.frame(variable = variables, copd = "", smoking = "",sex_male = "", diabetes = "", chd = "", 
                        op = "", radiation = "", stage = "", pleura = "", pericard = "", bmi = "", age = "", packyears = "", hb = "")
  results_estimate <<- rbind(results_estimate[0:row,], new_row, results_estimate[(row+1):nrow(results_estimate),])
  
}
insert_row("SUVMAX", 0)
insert_row("SUVMEAN", 5)
insert_row("SUV95", 10)
insert_row("SUVMEAN lung/liver", 15)
insert_row("SUVMEAN lung/blood pool", 20)
rownames(results_estimate) <- 1:nrow(results_estimate)

#rename
results_estimate[results_estimate == "suv_lung_max"] <- "whole lung"
results_estimate[results_estimate == "upper_suv_max"] <- "upper lung"
results_estimate[results_estimate == "lower_suv_max"] <- "lower lung"
results_estimate[results_estimate == "no_tumor_suv_max"] <- "TFL"

results_estimate[results_estimate == "suv_lung_mean"] <- "whole lung"
results_estimate[results_estimate == "upper_suv_mean"] <- "upper lung"
results_estimate[results_estimate == "lower_suv_mean"] <- "lower lung"
results_estimate[results_estimate == "no_tumor_suv_mean"] <- "TFL"

results_estimate[results_estimate == "suv_95"] <- "whole lung"
results_estimate[results_estimate == "upper_suv_95"] <- "upper lung"
results_estimate[results_estimate == "lower_suv_95"] <- "lower lung"
results_estimate[results_estimate == "no_tumor_suv_95"] <- "TFL"


results_estimate[results_estimate == "liver_ratio_mean"] <- "whole lung"
results_estimate[results_estimate == "liver_ratio_upper_mean"] <- "upper lung"
results_estimate[results_estimate == "liver_ratio_lower_mean"] <- "lower lung"
results_estimate[results_estimate == "liver_no_tumor_ratio_mean"] <- "TFL"

results_estimate[results_estimate == "aorta_ratio_mean"] <- "whole lung"
results_estimate[results_estimate == "aorta_ratio_upper_mean"] <- "upper lung"
results_estimate[results_estimate == "aorta_ratio_lower_mean"] <- "lower lung"
results_estimate[results_estimate == "aorta_no_tumor_ratio_mean"] <- "TFL"

results_estimate <- results_estimate %>%
  select(variable, sex_male, age,bmi, smoking, packyears, copd, stage,op, radiation, pleura, pericard,diabetes,chd,hb)

colnames(results_estimate)[colnames(results_estimate) == "variable"] <- "Variable"
colnames(results_estimate)[colnames(results_estimate) == "sex_male"] <- "Sex"
colnames(results_estimate)[colnames(results_estimate) == "age"] <- "Age"
colnames(results_estimate)[colnames(results_estimate) == "bmi"] <- "BMI"
colnames(results_estimate)[colnames(results_estimate) == "smoking"] <- "Smoking"
colnames(results_estimate)[colnames(results_estimate) == "packyears"] <- "PY"
colnames(results_estimate)[colnames(results_estimate) == "copd"] <- "COPD"
colnames(results_estimate)[colnames(results_estimate) == "stage"] <- "Stage"
colnames(results_estimate)[colnames(results_estimate) == "op"] <- "OP"
colnames(results_estimate)[colnames(results_estimate) == "radiation"] <- "Rad"
colnames(results_estimate)[colnames(results_estimate) == "pleura"] <- "PE"
colnames(results_estimate)[colnames(results_estimate) == "pericard"] <- "PCE"
colnames(results_estimate)[colnames(results_estimate) == "diabetes"] <- "DM"
colnames(results_estimate)[colnames(results_estimate) == "chd"] <- "CHD"
colnames(results_estimate)[colnames(results_estimate) == "hb"] <- "Hb"

#save
write.csv(results_estimate, file = "table A2c estimate.csv", row.names = FALSE)
print("saved: table A2c estimate")
print(results_estimate)